#########################################################################
#  File-Name: analyses_draft_2.R
#  Date: 02/01/2021
#  Author: Mustafa Mikdat Yildirim
#  Purpose: Data Analysis for the Paper: "Short of Suspension: How Suspension Warnings Can Reduce Hate Speech on Twitter"
#  Out File: Figures and Tables in the Paper
#  Data In: CSV dataset with the following variables:
##### "seed_user_screen", "screen_name"
##### "anonymity", "follower_count", "activity", "age", 
##### "pre_count_hate_ratio_w1", "pre_count_hate_ratio_w4"
##### "post_count_hate_ratio_w1", "post_count_hate_ratio_w4"
##### "pre_hate_w1", "pre_hate_w4", "post_hate_w1", "post_hate_w4"
##### "treatment_binary", "treatment_arm"
##### "pre_treatment_count_w1", "pre_treatment_count_w4"
#  Data Out: no data out
#  Prev file: analyses_draft_1.R
#########################################################################

### Variable definitions:
# "seed_user_screen": anonymized id of the suspended user
# "screen_name": anonymized id of the suspended user's follower
# "anonymity": user's anonymity level
# "follower_count": user's number of followers 
# "activity": user's daily average number of tweets prior to the treatment
# "age": log number of days since the creation of user's profile
# "pre_count_hate_ratio_w1": number of hateful tweets/all the tweets over the week before the treatment
# "pre_count_hate_ratio_w4": number of hateful tweets/all the tweets over the 4 weeks before the treatment
# "post_count_hate_ratio_w1": number of hateful tweets/all the tweets over the week after the treatment
# "post_count_hate_ratio_w4": number of hateful tweets/all the tweets over the 4 weeks after the treatment
# "pre_hate_w1": daily average number of hateful words over the week before the treatment
# "pre_hate_w4": daily average number of hateful words over the 4 weeks before the treatment
# "post_hate_w1": daily average number of hateful words over the week after the treatment
# "post_hate_w4": daily average number of hateful words over the week after the treatment
# "treatment_binary": binary variable that denotes all the users who received a treatment
# "treatment_arm": categorical variable that denotes each treatment arm and the control group
# "pre_treatment_count_w1": total number of tweets that the user tweeted over the week before the treatment
# "pre_treatment_count_w4": total number of tweets that the user tweeted over the 4 weeks before the treatment

############### FIGURES AND TABLES ARE CREATED IN THE ORDER THEY ARE FOUND IN THE PAPER ####################
rm(list = ls())
# Load required packages
pacman::p_load(readxl, plm, clubSandwich, haven,
               foreign, miceadds, dplyr, sandwich, lmtest,
               linkcomm, statnet, gtools, ggplot2, reticulate, 
               writexl, sjPlot, sjmisc, pastecs, data.table,
               interactions, coefplot, pastecs, stringr, log4r)

# Load dataset
final_data <- read.csv("~/Desktop/replication_final/final_data.csv")
final_data <- final_data[,-1]

############ Figure 2 ############
#### Table that shows number of followers per each suspended user 
followers_per_user <- as.data.frame(table(final_data$seed_user_screen))
followers_per_user <- arrange(followers_per_user, by=Freq)
followers_per_user$Var1 <- c("user_1", "user_2", "user_3", "user_4", "user_5", "user_6",
                             "user_7", "user_8", "user_9", "user_10", "user_11", "user_12",
                             "user_13", "user_14", "user_15", "user_16", "user_17", "user_18",
                             "user_19", "user_20", "user_21", "user_22", "user_23", "user_24",
                             "user_25", "user_26", "user_27")

#### Barplot that shows number of followers per each suspended user
barplot(followers_per_user$Freq,
        names.arg= followers_per_user$Var1,
        las=2,
        main="Number of Followers per Suspended User")


############ Table 1 ############
## Get rid of NA values to obtain summary statistics
descriptive <- final_data[!is.na(final_data$pre_count_hate_ratio_w4) & 
                            !is.na(final_data$post_count_hate_ratio_w4),]
descriptive <- descriptive[!is.na(descriptive$anonymity) & descriptive$anonymity!="NaN",]
## Select only relevant columns
descriptive <- final_data[,c("follower_count","activity","age","pre_count_hate_ratio_w4","anonymity")]
## Create the dataset for summary statistics of relevant variables
summary_stats <- as.data.frame(stat.desc(descriptive))
summary_stats <- summary_stats[c("mean","median","std.dev","min","max"),]
summary_stats <- round(summary_stats,digits=2)

############ Figure 4  ############
##Hypothesis 1: A tweet that warns a user of a potential suspension in the case of 
#employing hate speech will lead that user to decrease their use of 
#hateful language.

## Run relevant regressions
week_1 <- lm(post_count_hate_ratio_w1 ~ treatment_binary + 
               pre_count_hate_ratio_w1 + anonymity + activity + follower_count + age + 
               as.factor(seed_user_screen),data=final_data)
week_4 <- lm(post_count_hate_ratio_w4 ~ treatment_binary + 
               pre_count_hate_ratio_w4 + anonymity + activity + follower_count + age + 
               as.factor(seed_user_screen),data=final_data)


## Use saved regressions as arguments for multiplot function
multiplot(week_1, week_4, coefficients=c("treatment_binary"), 
          newNames=c(treatment_binary = "Treated"), 
          names=c("  1 Week", "  4 Weeks "), title="", scales="free_x",
          decreasing = TRUE, lwdOuter = 0.4, lwdInner = 0.8,
          sort = "alphabetical", innerCI=1.645, outerCI=1.96, single=TRUE,  zeroType = 0,
          legend.position="bottom") +
  scale_color_manual(values=c("black", "gray")) +
  theme_bw() + theme(panel.grid.major = element_blank(), panel.grid.minor = element_blank(), 
                     axis.line = element_line(colour = "black"), text = element_text(size=15),
                     axis.text.y = element_text(size=20, face="bold"))+
  xlab("Difference in Hateful Tweets/All Tweets Ratio")+ 
  geom_vline(aes(xintercept = 0), size = .5, linetype = "dashed")


############ Figure 5 ############
## Hypothesis 2: The more the warning tweet pushes the user to perceive their 
# potential suspension as costly, the more likely it is that 
# the user will decrease their use of hateful language.

## Hypothesis 3:
# The more credible the warning tweet is in terms of convincing 
# the user that they might get suspended as a result of using 
# hateful language, the more likely it is that the user will decrease 
# their use of hateful language.


## Hypothesis 4:
# The more credible the warning tweet is in terms of convincing 
# the user that they might get suspended as a result of using hateful language, 
# the more likely it is that the user will decrease their use of hateful language.

week_1 <- lm(post_count_hate_ratio_w1 ~ treatment_arm + 
               pre_count_hate_ratio_w1 + pre_treatment_count_w1 + anonymity + activity + follower_count + age + 
               as.factor(seed_user_screen),data=final_data)

coefs_for_plot <- data.frame(
  group = c("Low Legitimacy","High Legitimacy","Low Credibility","High Credibility",
            "Low Cost", "High Cost"),
  estimate = c(summary(week_1)$coefficients['treatment_armlow_legitimacy',1],
               
               summary(week_1)$coefficients['treatment_armhigh_legitimacy',1],
               
               summary(week_1)$coefficients['treatment_armlow_credibility',1],
               
               summary(week_1)$coefficients['treatment_armhigh_credibility',1],
               
               summary(week_1)$coefficients['treatment_armlow_cost',1],
               
               summary(week_1)$coefficients['treatment_armhigh_cost',1]
  ),
  se = c(summary(week_1)$coefficients['treatment_armlow_legitimacy',2],
         
         summary(week_1)$coefficients['treatment_armhigh_legitimacy',2],
         
         summary(week_1)$coefficients['treatment_armlow_credibility',2],
         
         summary(week_1)$coefficients['treatment_armhigh_credibility',2],
         
         summary(week_1)$coefficients['treatment_armlow_cost',2],
         
         summary(week_1)$coefficients['treatment_armhigh_cost',2]
  )
)


coefs_for_plot$order <-  c(8.75, 
                           7.25,
                           5.75,
                           4.25,
                           2.75,
                           1.25)



ggplot(data = coefs_for_plot, aes(y = estimate, x = order)) +
  geom_errorbar(data = coefs_for_plot, lwd=1, width=0.0, aes(x = order, 
                                                             ymin = estimate + qnorm(0.025)*se, 
                                                             ymax = estimate + qnorm(0.975)*se)
  ) +
  geom_point(size = 3.75) + 
  scale_y_continuous("Difference in Hateful Tweet Ratio", limits = c(-0.03,0.01),breaks = seq(-0.03,0.02,.01)) +
  scale_x_continuous("",breaks=seq(8.75,1.25,length.out=6),limits=c(1,9),labels=c("Low Legitimacy","High Legitimacy",
                                                                                  "Low Credibility","High Credibility",
                                                                                  "Low Cost", "High Cost")) +
  geom_hline(yintercept = 0,size=.5,colour="black",linetype="dotted") +
  theme_bw() +
  theme(panel.grid.major = element_blank(), panel.grid.minor = element_blank(),
        axis.text.x = element_text(size=15),
        axis.text.y = element_text(size=15, face="bold"),
        axis.title.x = element_text(size=15),
        legend.position = "none")+
  coord_flip() 



##################################### APPENDIX #####################################

### Figure A1: Main findings with missing data imputed as 0: Following Munger 2017
final_data_missed <- final_data
# Impute missing values of post treatment hate speech with 0
final_data_missed$post_count_hate_ratio_w1[is.na(final_data_missed$post_count_hate_ratio_w1)] <- 0
final_data_missed$post_count_hate_ratio_w4[is.na(final_data_missed$post_count_hate_ratio_w4)] <- 0

week_1 <- lm(post_count_hate_ratio_w1 ~ treatment_arm + 
               pre_count_hate_ratio_w1 + anonymity + activity + follower_count + age + 
               as.factor(seed_user_screen),data=final_data_missed)
week_4 <- lm(post_count_hate_ratio_w4 ~ treatment_arm + 
               pre_count_hate_ratio_w4 + anonymity + activity + follower_count + age + 
               as.factor(seed_user_screen),data=final_data_missed)


multiplot(week_1, week_4, coefficients=c("treatment_armlow_legitimacy", "treatment_armhigh_legitimacy", 
                                         "treatment_armlow_credibility", "treatment_armhigh_credibility",
                                         "treatment_armlow_cost", "treatment_armhigh_cost"), 
          newNames=c(treatment_armlow_legitimacy = "1: Low Legitimacy ", 
                     treatment_armhigh_legitimacy = "2: High Legitimacy ", 
                     treatment_armlow_credibility = "3: Low Credibility", 
                     treatment_armhigh_credibility = "4: High Credibility", 
                     treatment_armlow_cost = "5: Low Cost ",
                     treatment_armhigh_cost = "6: High Cost "), 
          names=c("  1 Week","  4 Weeks "), title="", scales="free_x",
          decreasing = TRUE,
          sort = "alphabetical", innerCI=1.645, outerCI=1.96, single=FALSE,  zeroType = 0,legend.position="none") +
  scale_color_manual(values=c("red", "blue", "seagreen", "black")) +
  theme_bw() + theme(panel.grid.major = element_blank(), panel.grid.minor = element_blank(), 
                     legend.position="none", axis.line = element_line(colour = "black"), 
                     text = element_text(size=15), axis.text.y = element_text(size=20, face="bold"))+
  ylab("Treatments") + xlab("Difference in Hateful Tweet Ratio")+ geom_vline(aes(xintercept = 0), size = .5, linetype = "dashed")


############ Figures B1 and B2: Power Analyses in the Appendix ############ 
### Create a function for simulation that is repeated once
simulation <- function(suspended_users = 20,
                      sample_followers = 49, 
                      no_treatment = 6, # number of treatment conditions in the paper
                      low_effect = -0.17,
                      high_effect = -0.34, ## Main effect from Munger (2017)
                      heterogeneous = FALSE){
  ### Create ids for suspended users and for their followers
  suspended_users_col <- rep(1:suspended_users,each=sample_followers)
  sample_followers_col <- c(1:(sample_followers*suspended_users) )
  ### Create treatment conditions
  treatment_conditions <- rep(rep(1:(no_treatment+1),each = (sample_followers/(no_treatment+1)) ),suspended_users )
  mydata <- data.frame( seed_user = suspended_users_col, 
                       follower= sample_followers_col, 
                       treatment_arm = treatment_conditions) 
  
  ### Create a normal distribution of baseline hate values with mean 3 and sd 1
  mydata$baseline <- NA
  for (i in 1:(length(unique(mydata$seed_user)))){
    mydata$baseline[mydata$seed_user==i] <- rnorm(n = length(mydata$baseline[mydata$seed_user==i]),
                                                 mean = 3,
                                                 sd = 1)
  }
  
  ### Assign as a normal distribution number of followers with mean=120 and sd=20 to each simulated subject 
  mydata$no_followers <- NA
  for (i in 1:(length(unique(mydata$seed_user)))){
    mydata$no_followers[mydata$seed_user==i] <- rnorm(n = length(mydata$no_followers[mydata$seed_user==i]),
                                                     mean = 120,
                                                     sd = 20)
  }
  
  ### Assign as a normal distribution anonymity scores with mean=1 and sd=1 to each simualted subject
  mydata$anonimity <- NA
  for (i in 1:(length(unique(mydata$seed_user)))){
    mydata$anonimity[mydata$seed_user==i] <- rnorm(n = length(mydata$anonimity[mydata$seed_user==i]),
                                                  mean = 1,
                                                  sd = 1)
  }
  
  
  mydata$effect <- NA
  
  ### Simulate hypothesized treatment effects with mean -0.17 and sd=1 that are also correlated
  ### with anonymity and follower no scores
  for (i in seq(2,(length(unique(mydata$treatment_arm))), by = 2 ) ){
    mydata$effect[mydata$treatment_arm==i] <- rnorm(n = length(mydata$effect[mydata$treatment_arm==i]),
                                                   mean = -0.17, sd = 1) - 
      3.936e-07 * mydata$no_followers[mydata$treatment_arm==i] +
      0.2 * mydata$anonimity[mydata$treatment_arm==i] 
  }
  
  ### Simulate hypothesized treatment effects with mean -0.34 and sd=1 that are also correlated
  ### with anonymity and follower no scores
  for (i in seq(3,(length(unique(mydata$treatment_arm))), by = 2 ) ){
    mydata$effect[mydata$treatment_arm==i] <- rnorm(n = length(mydata$effect[mydata$treatment_arm==i]),
                                                   mean = -0.34, sd = 1) + 
      3.127e-07 * mydata$no_followers[mydata$treatment_arm==i] +
      0.2 * mydata$anonimity[mydata$treatment_arm==i] 
  }
  
  ### Simulate no treatment effect for the contro arm
  mydata$effect[mydata$treatment_arm==1] <- 0
  
  ### Simulate outcome variables for treatment arms
  mydata$outcome <- mydata$baseline + mydata$effect
  mydata$treatment_arm <- as.factor(mydata$treatment_arm)
  mydata$seed_user <- as.factor(mydata$seed_user)
  
  
  if(heterogeneous==FALSE){
    mod <- lm(outcome~seed_user + treatment_arm + baseline, data = mydata)
    mod <- summary(mod)
    p_value <- coefficients(mod)['treatment_arm3','Pr(>|t|)']
  }
  else{
    mod <- lm(outcome~seed_user + treatment_arm*no_followers + baseline, data = mydata)
    mod <- summary(mod)
    p_value <- coefficients(mod)['treatment_arm2:no_followers','Pr(>|t|)']
  }
  
  return(p_value<=0.05)
}


suspended_users <- seq(5,75,5)
powers <- NA
# Running time is long, remove ## if you want to run
#for(i in 1:length(suspended_users)){
#  powers[i] <- mean(replicate(1000,
#                             simulation(suspended_users=suspended_users[i],
#                                        sample_followers=49, 
#                                        no_treatment=6,
#                                        heterogeneous = TRUE)) )
#}
# Plot the power analysis results: FIGURE B1
plotdata <- as.data.frame(cbind(suspended_users,powers))
ggplot()+
  geom_line(data=plotdata,aes(y=powers,x= suspended_users),size=1 ) +
  labs(x = "Number of Seed Users / 49 Followers Each",y = "Power") +
  coord_cartesian(xlim=range(suspended_users)) +
  theme(panel.background = element_blank(),
        panel.grid.major.x = element_line(color = "grey80"),
        panel.grid.major.y = element_line(color = "grey80")) +
  geom_hline(yintercept=0.8) +
  scale_x_continuous(breaks=suspended_users) 



sample_followers <- seq(35,119,7)
powers <- NA
# Running time is long, remove ## if you want to run
#for(i in 1:length(suspended_users)){
#  powers[i] <- mean(replicate(1000,
#                             simulation(suspended_users=20,
#                                        sample_followers=sample_followers[i], 
#                                        no_treatment=6)) )
#}
# Plot the power analysis results: FIGURE B2
plotdata <- as.data.frame(cbind(sample_followers,powers))
ggplot()+
  geom_line(data=plotdata,aes(y=powers,x= sample_followers),size=1 ) +
  labs(x = "Number of Followers for Each User / 25 Seed Users",y = "Power") +
  coord_cartesian(xlim=range(sample_followers)) +
  theme(panel.background = element_blank(),
        panel.grid.major.x = element_line(color = "grey80"),
        panel.grid.major.y = element_line(color = "grey80")) +
  geom_hline(yintercept=0.8) +
  scale_x_continuous(breaks=sample_followers) 

############ Figure C1: Heterogeneous Treatment Effects: Number of Followers ############ 

final_data_low_follower <- final_data[final_data$follower_count<=median(final_data$follower_count),]
final_data_high_follower <- final_data[final_data$follower_count>median(final_data$follower_count),]

week_1_low <- lm(post_count_hate_ratio_w1 ~ treatment_arm + 
                   pre_count_hate_ratio_w1 + anonymity + activity + follower_count + age + 
                   as.factor(seed_user_screen),data=final_data_low_follower)

week_1_high <- lm(post_count_hate_ratio_w1 ~ treatment_arm + 
                    pre_count_hate_ratio_w1 + anonymity + activity + follower_count + age + 
                    as.factor(seed_user_screen),data=final_data_high_follower)

coefs_for_plot <- data.frame(
  group = c(rep(c("Low Legitimacy","High Legitimacy","Low Credibility","High Credibility",
                  "Low Cost", "High Cost"),each=2)),
  estimate = c(summary(week_1_low)$coefficients[2,1],
               summary(week_1_high)$coefficients[2,1],
               
               summary(week_1_low)$coefficients[3,1],
               summary(week_1_high)$coefficients[3,1],
               
               summary(week_1_low)$coefficients[4,1],
               summary(week_1_high)$coefficients[4,1],
               
               summary(week_1_low)$coefficients[5,1],
               summary(week_1_high)$coefficients[5,1],
               
               summary(week_1_low)$coefficients[6,1],
               summary(week_1_high)$coefficients[6,1],
               
               summary(week_1_low)$coefficients[7,1],
               summary(week_1_high)$coefficients[7,1]
  ),
  se = c(summary(week_1_low)$coefficients[2,2],
         summary(week_1_high)$coefficients[2,2],
         
         summary(week_1_low)$coefficients[3,2],
         summary(week_1_high)$coefficients[3,2],
         
         summary(week_1_low)$coefficients[4,2],
         summary(week_1_high)$coefficients[4,2],
         
         summary(week_1_low)$coefficients[5,2],
         summary(week_1_high)$coefficients[5,2],
         
         summary(week_1_low)$coefficients[6,2],
         summary(week_1_high)$coefficients[6,2],
         
         summary(week_1_low)$coefficients[7,2],
         summary(week_1_high)$coefficients[7,2]
  )
)

coefs_for_plot$followers <- factor(rep(c("Low Number of Followers","High Number of Followers"),6),levels = c("Low Number of Followers","High Number of Followers"),ordered=T)
coefs_for_plot$order <-  c(9,8.5, 
                           7.5, 7,
                           6, 5.5,
                           4.5, 4,
                           3, 2.5,
                           1.5, 1)
coefs_for_plot$fontface <- rep(c("plain","bold","plain","bold","plain","bold"),each=2)




############ Figure C2: Heterogeneous Treatment Effects: Anonymity Level ############
week_1_an2 <- lm(post_count_hate_ratio_w1 ~ treatment_arm + 
                   pre_count_hate_ratio_w1 + anonymity + activity + follower_count + age + 
                   as.factor(seed_user_screen),data=final_data[final_data$anonymity==2,])
week_1_an1 <- lm(post_count_hate_ratio_w1 ~ treatment_arm + 
                   pre_count_hate_ratio_w1 + anonymity + activity + follower_count + age + 
                   as.factor(seed_user_screen),data=final_data[final_data$anonymity==1,])
week_1_an0 <- lm(post_count_hate_ratio_w1 ~ treatment_arm + 
                   pre_count_hate_ratio_w1 + anonymity + activity + follower_count + age + 
                   as.factor(seed_user_screen),data=final_data[final_data$anonymity==0,])


coefs_for_plot <- data.frame(
  group = c(rep(c("Low Legitimacy","High Legitimacy","Low Credibility","High Credibility",
                  "Low Cost", "High Cost"),each=3)),
  estimate = c(summary(week_1_an2)$coefficients[2,1],
               summary(week_1_an1)$coefficients[2,1],
               summary(week_1_an0)$coefficients[2,1],
               
               summary(week_1_an2)$coefficients[3,1],
               summary(week_1_an1)$coefficients[3,1],
               summary(week_1_an0)$coefficients[3,1],
               
               summary(week_1_an2)$coefficients[4,1],
               summary(week_1_an1)$coefficients[4,1],
               summary(week_1_an0)$coefficients[4,1],
               
               summary(week_1_an2)$coefficients[5,1],
               summary(week_1_an1)$coefficients[5,1],
               summary(week_1_an0)$coefficients[5,1],
               
               summary(week_1_an2)$coefficients[6,1],
               summary(week_1_an1)$coefficients[6,1],
               summary(week_1_an0)$coefficients[6,1],
               
               summary(week_1_an2)$coefficients[7,1],
               summary(week_1_an1)$coefficients[7,1],
               summary(week_1_an0)$coefficients[7,1]
  ),
  se = c(summary(week_1_an2)$coefficients[2,2],
         summary(week_1_an1)$coefficients[2,2],
         summary(week_1_an0)$coefficients[2,2],
         
         summary(week_1_an2)$coefficients[3,2],
         summary(week_1_an1)$coefficients[3,2],
         summary(week_1_an0)$coefficients[3,2],
         
         summary(week_1_an2)$coefficients[4,2],
         summary(week_1_an1)$coefficients[4,2],
         summary(week_1_an0)$coefficients[4,2],
         
         summary(week_1_an2)$coefficients[5,2],
         summary(week_1_an1)$coefficients[5,2],
         summary(week_1_an0)$coefficients[5,2],
         
         summary(week_1_an2)$coefficients[6,2],
         summary(week_1_an1)$coefficients[6,2],
         summary(week_1_an0)$coefficients[6,2],
         
         summary(week_1_an2)$coefficients[7,2],
         summary(week_1_an1)$coefficients[7,2],
         summary(week_1_an0)$coefficients[7,2]
  )
)

coefs_for_plot$anonymity <- factor(rep(c("High Anonymity","Medium Anonymity","Low Anonymity"),6),levels = c("High Anonymity","Medium Anonymity","Low Anonymity"),ordered=T)
coefs_for_plot$order <-  c(9, 8.75, 8.5, 
                           7.5, 7.25, 7,
                           6, 5.75, 5.5,
                           4.5, 4.25, 4,
                           3, 2.75, 2.5,
                           1.5, 1.25, 1)
coefs_for_plot$fontface <- rep(c("plain","bold","plain","bold","plain","bold"),each=3)


ggplot(data = coefs_for_plot, aes(y = estimate, x = order,color=anonymity)) +
  geom_errorbar(data = coefs_for_plot, lwd=1, width=0.0, aes(x = order, 
                                                             ymin = estimate + qnorm(0.025)*se, 
                                                             ymax = estimate + qnorm(0.975)*se,
                                                             col=anonymity)
  ) +
  geom_point(size = 3.75,aes(shape=anonymity)) + 
  geom_text(data = coefs_for_plot,
            aes(y = estimate + qnorm(0.975)*se, x=order,label=anonymity),hjust=-0.15) + 
  scale_y_continuous("Difference in Hateful Tweet Ratio", limits = c(-0.04,0.05),breaks = seq(-0.04,0.05,.01)) +
  scale_x_continuous("",breaks=seq(9,1.25,length.out=6),limits=c(1,9),labels=c("Low Legitimacy","High Legitimacy",
                                                                               "Low Credibility","High Credibility",
                                                                               "Low Cost", "High Cost")) +
  scale_color_manual(breaks = c("High Anonymity","Medium Anonymity","Low Anonymity"),values=c("blue","red","green")) + 
  scale_shape_manual(values = c("High Anonymity" = 19,"Medium Anonymity" = 17, "Low Anonymity" = 15)) + 
  geom_hline(yintercept = 0,size=.5,colour="black",linetype="dotted") +
  theme_bw() +
  coord_flip() +  
  # theme(axis.title.y = element_blank()) +
  theme(axis.text.x = element_text(size=15),
        axis.title.x = element_text(size=20),
        axis.text.y = element_text(size=20, face="bold"),
        panel.grid.major = element_blank(), panel.grid.minor = element_blank(),
        legend.position = "none")


############ Figure C3: Heterogeneous Treatment Effects, Activity Level ############ 
final_data_low_activity <- final_data[final_data$activity<=median(final_data$activity),]
final_data_high_activity <- final_data[final_data$activity>median(final_data$activity),]

week_1_low_activity <- lm(post_count_hate_ratio_w1 ~ treatment_arm + 
                            pre_count_hate_ratio_w1 + anonymity + activity + follower_count + age + 
                            as.factor(seed_user_screen),data=final_data_low_activity)
week_1_high_activity <- lm(post_count_hate_ratio_w1 ~ treatment_arm + 
                             pre_count_hate_ratio_w1 + anonymity + activity + follower_count + age + 
                             as.factor(seed_user_screen),data=final_data_high_activity)

coefs_for_plot <- data.frame(
  group = c(rep(c("Low Legitimacy","High Legitimacy","Low Credibility","High Credibility",
                  "Low Cost", "High Cost"),each=2)),
  estimate = c(summary(week_1_low_activity)$coefficients[2,1],
               summary(week_1_high_activity)$coefficients[2,1],
               
               summary(week_1_low_activity)$coefficients[3,1],
               summary(week_1_high_activity)$coefficients[3,1],
               
               summary(week_1_low_activity)$coefficients[4,1],
               summary(week_1_high_activity)$coefficients[4,1],
               
               summary(week_1_low_activity)$coefficients[5,1],
               summary(week_1_high_activity)$coefficients[5,1],
               
               summary(week_1_low_activity)$coefficients[6,1],
               summary(week_1_high_activity)$coefficients[6,1],
               
               summary(week_1_low_activity)$coefficients[7,1],
               summary(week_1_high_activity)$coefficients[7,1]
  ),
  se = c(summary(week_1_low_activity)$coefficients[2,2],
         summary(week_1_high_activity)$coefficients[2,2],
         
         summary(week_1_low_activity)$coefficients[3,2],
         summary(week_1_high_activity)$coefficients[3,2],
         
         summary(week_1_low_activity)$coefficients[4,2],
         summary(week_1_high_activity)$coefficients[4,2],
         
         summary(week_1_low_activity)$coefficients[5,2],
         summary(week_1_high_activity)$coefficients[5,2],
         
         summary(week_1_low_activity)$coefficients[6,2],
         summary(week_1_high_activity)$coefficients[6,2],
         
         summary(week_1_low_activity)$coefficients[7,2],
         summary(week_1_high_activity)$coefficients[7,2]
  )
)

coefs_for_plot$activity <- factor(rep(c("Low Level of Activity","High Level of Activity"),6),levels = c("Low Level of Activity","High Level of Activity"),ordered=T)
coefs_for_plot$order <-  c(9,8.5, 
                           7.5, 7,
                           6, 5.5,
                           4.5, 4,
                           3, 2.5,
                           1.5, 1)
coefs_for_plot$fontface <- rep(c("plain","bold","plain","bold","plain","bold"),each=2)


ggplot(data = coefs_for_plot, aes(y = estimate, x = order,color=activity)) +
  geom_errorbar(data = coefs_for_plot, lwd=1, width=0.0, aes(x = order, 
                                                             ymin = estimate + qnorm(0.025)*se, 
                                                             ymax = estimate + qnorm(0.975)*se,
                                                             col=activity)
  ) +
  geom_point(size = 3.75,aes(shape=activity)) + 
  geom_text(data = coefs_for_plot,
            aes(y = estimate + qnorm(0.975)*se, x=order,label=activity),hjust=-0.15) + 
  scale_y_continuous("Difference in Hateful Tweet Ratio", limits = c(-0.05,0.05),breaks = seq(-0.04,0.05,.01)) +
  scale_x_continuous("",breaks=seq(8.75,1.25,length.out=6),limits=c(1,9),labels=c("Low Legitimacy","High Legitimacy",
                                                                                  "Low Credibility","High Credibility",
                                                                                  "Low Cost", "High Cost")) +
  scale_color_manual(breaks = c("Low Level of Activity","High Level of Activity"),values=c("blue","red")) + 
  scale_shape_manual(values = c("Low Level of Activity" = 19,"High Level of Activity" = 17)) + 
  geom_hline(yintercept = 0,size=.5,colour="black",linetype="dotted") +
  theme_bw() +
  coord_flip() +  
  # theme(axis.title.y = element_blank()) +
  theme(axis.text.x = element_text(size=15),
        axis.title.x = element_text(size=20),
        axis.text.y = element_text(size=20, face="bold"),
        panel.grid.major = element_blank(), panel.grid.minor = element_blank(),
        legend.position = "none")


############ Figure C4: Heterogeneous Treatment Effects, Age ############ 
final_data_low_age <- final_data[final_data$activity<=median(final_data$age),]
final_data_high_age <- final_data[final_data$activity>median(final_data$age),]

week_1_low_age <- lm(post_count_hate_ratio_w1 ~ treatment_arm + 
                       pre_count_hate_ratio_w1 + anonymity + activity + follower_count + age + 
                       as.factor(seed_user_screen),data=final_data_low_age)
week_1_high_age <- lm(post_count_hate_ratio_w1 ~ treatment_arm + 
                        pre_count_hate_ratio_w1 + anonymity + activity + follower_count + age + 
                        as.factor(seed_user_screen),data=final_data_high_age)

coefs_for_plot <- data.frame(
  group = c(rep(c("Low Legitimacy","High Legitimacy","Low Credibility","High Credibility",
                  "Low Cost", "High Cost"),each=2)),
  estimate = c(summary(week_1_low_age)$coefficients[2,1],
               summary(week_1_high_age)$coefficients[2,1],
               
               summary(week_1_low_age)$coefficients[3,1],
               summary(week_1_high_age)$coefficients[3,1],
               
               summary(week_1_low_age)$coefficients[4,1],
               summary(week_1_high_age)$coefficients[4,1],
               
               summary(week_1_low_age)$coefficients[5,1],
               summary(week_1_high_age)$coefficients[5,1],
               
               summary(week_1_low_age)$coefficients[6,1],
               summary(week_1_high_age)$coefficients[6,1],
               
               summary(week_1_low_age)$coefficients[7,1],
               summary(week_1_high_age)$coefficients[7,1]
  ),
  se = c(summary(week_1_low_age)$coefficients[2,2],
         summary(week_1_high_age)$coefficients[2,2],
         
         summary(week_1_low_age)$coefficients[3,2],
         summary(week_1_high_age)$coefficients[3,2],
         
         summary(week_1_low_age)$coefficients[4,2],
         summary(week_1_high_age)$coefficients[4,2],
         
         summary(week_1_low_age)$coefficients[5,2],
         summary(week_1_high_age)$coefficients[5,2],
         
         summary(week_1_low_age)$coefficients[6,2],
         summary(week_1_high_age)$coefficients[6,2],
         
         summary(week_1_low_age)$coefficients[7,2],
         summary(week_1_high_age)$coefficients[7,2]
  )
)

coefs_for_plot$age <- factor(rep(c("Low Level of Age","High Level of Age"),6),levels = c("Low Level of Age","High Level of Age"),ordered=T)
coefs_for_plot$order <-  c(9,8.5, 
                           7.5, 7,
                           6, 5.5,
                           4.5, 4,
                           3, 2.5,
                           1.5, 1)
coefs_for_plot$fontface <- rep(c("plain","bold","plain","bold","plain","bold"),each=2)


ggplot(data = coefs_for_plot, aes(y = estimate, x = order,color=age)) +
  geom_errorbar(data = coefs_for_plot, lwd=1, width=0.0, aes(x = order, 
                                                             ymin = estimate + qnorm(0.025)*se, 
                                                             ymax = estimate + qnorm(0.975)*se,
                                                             col=age)
  ) +
  geom_point(size = 3.75,aes(shape=age)) + 
  geom_text(data = coefs_for_plot,
            aes(y = estimate + qnorm(0.975)*se, x=order,label=age),hjust=-0.15) + 
  scale_y_continuous("Difference in Hateful Tweet Ratio", limits = c(-0.05,0.05),breaks = seq(-0.04,0.05,.01)) +
  scale_x_continuous("",breaks=seq(8.75,1.25,length.out=6),limits=c(1,9),labels=c("Low Legitimacy","High Legitimacy",
                                                                                  "Low Credibility","High Credibility",
                                                                                  "Low Cost", "High Cost")) +
  scale_color_manual(breaks = c("Low Level of Age","High Level of Age"),values=c("blue","red")) + 
  scale_shape_manual(values = c("Low Level of Age" = 19,"High Level of Age" = 17)) + 
  geom_hline(yintercept = 0,size=.5,colour="black",linetype="dotted") +
  theme_bw() +
  coord_flip() +  
  # theme(axis.title.y = element_blank()) +
  theme(axis.text.x = element_text(size=15),
        axis.title.x = element_text(size=20),
        axis.text.y = element_text(size=20, face="bold"),
        panel.grid.major = element_blank(), panel.grid.minor = element_blank(),
        legend.position = "none")


############ Figure D1: Continuous Heterogenous Treatment Effects: Number of Followers ############ 
final_data$seed_user_screen <- as.character(final_data$seed_user_screen)
week_1 <- lm(post_count_hate_ratio_w1 ~ as.factor(treatment_arm)*follower_count + 
               pre_count_hate_ratio_w1 + anonymity + activity + age + 
               as.factor(seed_user_screen), data=final_data)

interact_plot(week_1, pred = follower_count, modx = treatment_arm,
              y.label="Ratio of Hateful Tweets over the Total Number of Tweets",
              x.label = "Number of Followers")



############ Figure D2: Continuous Heterogenous Treatment Effects: Activity of Profile ############ 
week_1 <- lm(post_count_hate_ratio_w1 ~ treatment_arm*activity + 
               pre_count_hate_ratio_w1 + anonymity + age + follower_count + 
               as.factor(seed_user_screen),data=final_data)

interact_plot(week_1, pred = activity, modx = treatment_arm,
              y.label="Ratio of Hateful Tweets over the Total Number of Tweets",
              x.label = "Activity")


############ Figure D3: Continuous Heterogenous Treatment Effects: Age of the Profile ############ 
week_1 <- lm(post_count_hate_ratio_w1 ~ treatment_arm*age + 
               pre_count_hate_ratio_w1 + anonymity + activity + follower_count + 
               as.factor(seed_user_screen),data=final_data)

interact_plot(week_1, pred = age, modx = treatment_arm,
              y.label="Ratio of Hateful Tweets over the Total Number of Tweets",
              x.label = "Age")



############ Figure D4: Continuous Heterogenous Treatment Effects: Anonymity of Profile ############ 
week_1 <- lm(post_count_hate_ratio_w1 ~ treatment_arm*anonymity + 
               pre_count_hate_ratio_w1 + age + activity + follower_count +
               as.factor(seed_user_screen),data=final_data)

interact_plot(week_1, pred = anonymity, modx = treatment_arm,
              y.label="Ratio of Hateful Tweets over the Total Number of Tweets",
              x.label = "Anonymity")


############ Figure E1  ############ 
survey = read.csv("~/Desktop/replication_final/survey.csv",
                  stringsAsFactors = FALSE)
survey = survey[-c(1:3,54),]
survey = survey[,c("Duration..in.seconds.","Q84","Q85_2","Q85_13","Q88","Q89_15","Q89_16","Q93","Q94_17","Q94_18")]
colnames(survey) = c("duration","cost_choice","Q85_2_low_cost","Q85_13_high_cost",
                     "legitimacy_choice","Q89_15_legitimacy_high","Q89_16_legitimacy_low",
                     "credibility_choice","Q94_17_credibility_high","Q94_18_credibility_low")
rownames(survey) <- NULL
survey[survey==''] = NA

# cost: 
##### 1: high_cost
##### 2: low_cost
##### 3: indifferent

# legitimacy: 
##### 5: high_legitimacy
##### 6: low_legitimacy
##### 7: indifferent

# credibility: 
##### 8: high_credibility
##### 9: low_credibility
##### 10: indifferent

survey$cost_choice[survey$cost_choice=="1"] = "high_cost"
survey$cost_choice[survey$cost_choice=="2"] = "low_cost"
survey$cost_choice[survey$cost_choice=="3"] = "indifferent"

survey$legitimacy_choice[survey$legitimacy_choice=="5"] = "high_legitimacy"
survey$legitimacy_choice[survey$legitimacy_choice=="6"] = "low_legitimacy"
survey$legitimacy_choice[survey$legitimacy_choice=="7"] = "indifferent"

survey$credibility_choice[survey$credibility_choice=="8"] = "high_credibility"
survey$credibility_choice[survey$credibility_choice=="9"] = "low_credibility"
survey$credibility_choice[survey$credibility_choice=="10"] = "indifferent"

for(i in c(3,4,6,7,9,10)){
  survey[,i] = as.numeric(survey[,i])
}

survey_original = survey


survey = survey[,c("Q85_2_low_cost","Q85_13_high_cost",
                   "Q89_15_legitimacy_high","Q89_16_legitimacy_low",
                   "Q94_17_credibility_high","Q94_18_credibility_low")]

analysis_sd = as.data.frame(apply(survey, 2, sd, na.rm = TRUE)) 
analysis_mean = as.data.frame(apply(survey, 2, mean, na.rm = TRUE)) 

analysis = cbind(analysis_sd, analysis_mean)
analysis = setDT(analysis, keep.rownames = TRUE)[]
colnames(analysis) = c("questions","sd","mean")
analysis$question = c("cost","cost","legitimacy","legitimacy","credibility","credibility")

analysis$type = c("low","high",
                  "high","low",
                  "high","low")

ggplot(analysis, aes(x=question, y=mean, fill=type)) + 
  geom_bar(stat="identity", position=position_dodge()) +
  #geom_errorbar(aes(ymin=mean-1.96*sd, ymax=mean+1.96*sd), width=.2,
  #position=position_dodge(.9))+
  scale_y_continuous(breaks=seq(1,100,10)) +
  labs(title = "Sample Size = 50") + scale_fill_brewer(palette="Paired") + theme_minimal() +
  theme(axis.text.x = element_text(size=20, face="bold"))+
  theme(axis.text.y = element_text(size=15, face="bold"))


############ Figure F1: Alternative Outcome ############ 
week_1 <- lm(post_hate_w1 ~ treatment_arm + 
               pre_hate_w1 + pre_treatment_count_w1 + anonymity + activity + follower_count + age + 
               as.factor(seed_user_screen),data=final_data)
week_4 <- lm(post_hate_w4 ~ treatment_arm + 
               pre_hate_w4 + pre_treatment_count_w4 + anonymity + activity + follower_count + age + 
               as.factor(seed_user_screen),data=final_data)

multiplot(week_1, week_4, coefficients=c("treatment_armlow_legitimacy", "treatment_armhigh_legitimacy", 
                                         "treatment_armlow_credibility", "treatment_armhigh_credibility",
                                         "treatment_armlow_cost", "treatment_armhigh_cost"), 
          newNames=c(treatment_armlow_legitimacy = "1: Low Legitimacy ", 
                     treatment_armhigh_legitimacy = "2: High Legitimacy ", 
                     treatment_armlow_credibility = "3: Low Credibility", 
                     treatment_armhigh_credibility = "4: High Credibility", 
                     treatment_armlow_cost = "5: Low Cost ",
                     treatment_armhigh_cost = "6: High Cost "), 
          names=c("  1 Week", "  4 Weeks "), title="", scales="free_x",
          decreasing = TRUE, lwdOuter = 0.4, lwdInner = 0.8,
          sort = "alphabetical", innerCI=1.645, outerCI=1.96, single=FALSE,  zeroType = 0,legend.position="none") +
  scale_color_manual(values=c("red", "blue", "seagreen")) +
  theme(axis.text.y = element_text(size=20, face="bold"),
        panel.grid.major = element_blank(), panel.grid.minor = element_blank(), 
        legend.position="none", axis.line = element_line(colour = "black"), 
        text = element_text(size=15)) + 
  theme_bw() +
  xlab("Difference in the number of Hateful Words")+ geom_vline(aes(xintercept = 0), size = .5, linetype = "dashed")


############ TABlES G 1-4: Detailed Tables for the Figures 3-6 ############ 

# FORMATTING FUNCTION
formatStats <- function(fitUp=NULL,
                        roundArg=3,
                        coefListArg = coefList){
  coefMatch <- match(coefListArg, 
                     tidy(fitUp)$term)
  formatOut <- as.character(
    c(
      t(
        round(
          tidy(fitUp)[coefMatch,
                      c("estimate","std.error","p.value")]
          ,
          roundArg)
      )
    )
  )
  
  formatOut[is.na(formatOut)] <- ""              
  nCoefs <- length(coefListArg)
  formatOut[2+3*((1:(nCoefs))-1)] <- paste0("(",formatOut[2+3*((1:(nCoefs))-1)],")")
  formatOut[3+3*((1:(nCoefs))-1)] <- paste0("[",formatOut[3+3*((1:(nCoefs))-1)],"]")
  
  formatOut <- c(formatOut, as.character(fitUp$N))
  return(formatOut)
}

###  Table G1 Column 1: Testing hypothesis 1 one week after the treatment
coefList <- c('(Intercept)', 'treatment_binary', 'pre_count_hate_ratio_w1', 'anonymity', 'activity',
              'follower_count', 'age')

survReg <- function(varIn = NULL){
  week_1 <- lm(post_count_hate_ratio_w1 ~ treatment_binary + 
                 pre_count_hate_ratio_w1 + anonymity + activity + follower_count + age + 
                 as.factor(seed_user_screen),data=final_data)
  
  labVec <- c(rbind(c("(Intercept)",
                      "Treated",
                      "Pre-Treatment Hate",
                      "Anonymity",
                      "Activity",
                      "Follower Count",
                      "Age"),
                    rep("(SE)", length(coefList)),
                    rep("[p]", length(coefList))), "N")
  # FORMATTING TABLES  
  reg_format <- c(formatStats(week_1), nrow(week_1$model))
  reg_table <- cbind(labVec,
                     reg_format)
  
  reg_table[reg_table=="()"] <- ""
  reg_table[reg_table=="[]"] <- ""
  
  colnames(reg_table) <- c("Variable",
                           "(1)")
  
  regOut <- list(week_1=week_1,
                 reg_table=reg_table)
  
  return(regOut)
}

table_G1_column_1 <- survReg()$reg_table

###  Table G1 Column 2: Testing hypothesis 1 four weeks after the treatmenr
coefList = c('(Intercept)', 'treatment_binary', 'pre_count_hate_ratio_w4', 'anonymity', 'activity',
             'follower_count', 'age')

survReg <- function(varIn = NULL){
  week_4 <- lm(post_count_hate_ratio_w4 ~ treatment_binary + 
                 pre_count_hate_ratio_w4 + anonymity + activity + follower_count + age + 
                 as.factor(seed_user_screen),data=final_data)
  
  labVec <- c(rbind(c("(Intercept)",
                      "Treated",
                      "Pre-Treatment Hate",
                      "Anonymity",
                      "Activity",
                      "Follower Count",
                      "Age"),
                    rep("(SE)", length(coefList)),
                    rep("[p]", length(coefList))), "N")
  # FORMATTING TABLES  
  reg_format <- c(formatStats(week_4), nrow(week_4$model))
  reg_table <- cbind(labVec,
                     reg_format)
  
  reg_table[reg_table=="()"] <- ""
  reg_table[reg_table=="[]"] <- ""
  
  colnames(reg_table) <- c("Variable",
                           "(1)")
  
  regOut <- list(week_4=week_4,
                 reg_table=reg_table)
  
  return(regOut)
}

table_G1_column_2 <- survReg()$reg_table

###  Table G2 Column 1: Testing hypotheses 2-4 one week after the treatment
coefList <- c('(Intercept)', 
              'treatment_armlow_legitimacy', "treatment_armhigh_legitimacy", 
              "treatment_armlow_credibility", "treatment_armhigh_credibility",
              "treatment_armlow_cost", "treatment_armhigh_cost",
              'pre_count_hate_ratio_w1', 'anonymity', 'activity',
              'follower_count', 'age')

survReg <- function(varIn = NULL){
  week_1 <- lm(post_count_hate_ratio_w1 ~ treatment_arm + 
                 pre_count_hate_ratio_w1 + pre_treatment_count_w1 + anonymity + activity + follower_count + age + 
                 as.factor(seed_user_screen),data=final_data)
  
  labVec <- c(rbind(c("(Intercept)",
                      "Low Legitimacy", 'High Legitimacy', 'Low Credibility', 'High Credibility',
                      'Low Costliness', 'High Costliness',
                      "Pre-Treatment Hate",
                      "Anonymity",
                      "Activity",
                      "Follower Count",
                      "Age"),
                    rep("(SE)", length(coefList)),
                    rep("[p]", length(coefList))), "N")
  # FORMATTING TABLES  
  reg_format <- c(formatStats(week_1), nrow(week_1$model))
  reg_table <- cbind(labVec,
                     reg_format)
  
  reg_table[reg_table=="()"] <- ""
  reg_table[reg_table=="[]"] <- ""
  
  colnames(reg_table) <- c("Variable",
                           "(1)")
  
  regOut <- list(week_1=week_1,
                 reg_table=reg_table)
  
  return(regOut)
}

table_G2_column_1 <- survReg()$reg_table

###  Table G2 Column 2: Testing hypotheses 2-4 four weeks after the treatment
coefList <- c('(Intercept)', 
              'treatment_armlow_legitimacy', "treatment_armhigh_legitimacy", 
              "treatment_armlow_credibility", "treatment_armhigh_credibility",
              "treatment_armlow_cost", "treatment_armhigh_cost",
              'pre_count_hate_ratio_w4', 'anonymity', 'activity',
              'follower_count', 'age')

survReg <- function(varIn = NULL){
  week_4 <- lm(post_count_hate_ratio_w4 ~ treatment_arm + 
                 pre_count_hate_ratio_w4 + pre_treatment_count_w4 + anonymity + activity + follower_count + age + 
                 as.factor(seed_user_screen),data=final_data)
  
  labVec <- c(rbind(c("(Intercept)",
                      "Low Legitimacy", 'High Legitimacy', 'Low Credibility', 'High Credibility',
                      'Low Costliness', 'High Costliness',
                      "Pre-Treatment Hate",
                      "Anonymity",
                      "Activity",
                      "Follower Count",
                      "Age"),
                    rep("(SE)", length(coefList)),
                    rep("[p]", length(coefList))), "N")
  # FORMATTING TABLES  
  reg_format = c(formatStats(week_4), nrow(week_4$model))
  reg_table <- cbind(labVec,
                     reg_format)
  
  reg_table[reg_table=="()"] <- ""
  reg_table[reg_table=="[]"] <- ""
  
  colnames(reg_table) <- c("Variable",
                           "(1)")
  
  regOut <- list(week_4=week_4,
                 reg_table=reg_table)
  
  return(regOut)
}

table_G2_column_2 <- survReg()$reg_table

###  Table G3 Column 1: Hterogenous treatment effects
final_data_low_follower <- final_data[final_data$follower_count<=median(final_data$follower_count),]
coefList = c('(Intercept)', 
             'treatment_armlow_legitimacy', "treatment_armhigh_legitimacy", 
             "treatment_armlow_credibility", "treatment_armhigh_credibility",
             "treatment_armlow_cost", "treatment_armhigh_cost",
             'pre_count_hate_ratio_w1', 'anonymity', 'activity',
             'follower_count', 'age')

survReg <- function(varIn = NULL){
  week_1 <- lm(post_count_hate_ratio_w1 ~ treatment_arm + 
                 pre_count_hate_ratio_w1 + anonymity + activity + follower_count + age + 
                 as.factor(seed_user_screen),data=final_data_low_follower)
  
  labVec <- c(rbind(c("(Intercept)",
                      "Low Legitimacy", 'High Legitimacy', 'Low Credibility', 'High Credibility',
                      'Low Costliness', 'High Costliness',
                      "Pre-Treatment Hate",
                      "Anonymity",
                      "Activity",
                      "Follower Count",
                      "Age"),
                    rep("(SE)", length(coefList)),
                    rep("[p]", length(coefList))), "N")
  # FORMATTING TABLES  
  reg_format <- c(formatStats(week_1), nrow(week_1$model))
  reg_table <- cbind(labVec,
                     reg_format)
  
  reg_table[reg_table=="()"] <- ""
  reg_table[reg_table=="[]"] <- ""
  
  colnames(reg_table) <- c("Variable",
                           "(1)")
  
  regOut <- list(week_1=week_1,
                 reg_table=reg_table)
  
  return(regOut)
}

table_G3_column_1 <- survReg()$reg_table

###  Table G3 Column 2: heterogenous treatment effects
final_data_high_follower = final_data[final_data$follower_count>median(final_data$follower_count),]
coefList <- c('(Intercept)', 
              'treatment_armlow_legitimacy', "treatment_armhigh_legitimacy", 
              "treatment_armlow_credibility", "treatment_armhigh_credibility",
              "treatment_armlow_cost", "treatment_armhigh_cost",
              'pre_count_hate_ratio_w1', 'anonymity', 'activity',
              'follower_count', 'age')

survReg <- function(varIn = NULL){
  week_1 <- lm(post_count_hate_ratio_w1 ~ treatment_arm + 
                 pre_count_hate_ratio_w1 + anonymity + activity + follower_count + age + 
                 as.factor(seed_user_screen),data=final_data_high_follower)
  
  labVec <- c(rbind(c("(Intercept)",
                      "Low Legitimacy", 'High Legitimacy', 'Low Credibility', 'High Credibility',
                      'Low Costliness', 'High Costliness',
                      "Pre-Treatment Hate",
                      "Anonymity",
                      "Activity",
                      "Follower Count",
                      "Age"),
                    rep("(SE)", length(coefList)),
                    rep("[p]", length(coefList))), "N")
  # FORMATTING TABLES  
  reg_format <- c(formatStats(week_1), nrow(week_1$model))
  reg_table <- cbind(labVec,
                     reg_format)
  
  reg_table[reg_table=="()"] <- ""
  reg_table[reg_table=="[]"] <- ""
  
  colnames(reg_table) <- c("Variable",
                           "(1)")
  
  regOut <- list(week_1=week_1,
                 reg_table=reg_table)
  
  return(regOut)
}

table_G3_column_2 <- survReg()$reg_table

###  Table G4 Column 1: Hetergoenous treatment effects
coefList <- c('(Intercept)', 
              'treatment_armlow_legitimacy', "treatment_armhigh_legitimacy", 
              "treatment_armlow_credibility", "treatment_armhigh_credibility",
              "treatment_armlow_cost", "treatment_armhigh_cost",
              'pre_count_hate_ratio_w1', 'activity',
              'follower_count', 'age')

survReg <- function(varIn = NULL){
  week_1 <- lm(post_count_hate_ratio_w1 ~ treatment_arm + 
                 pre_count_hate_ratio_w1 + activity + follower_count + age + 
                 as.factor(seed_user_screen),data=final_data[final_data$anonymity==0,])
  
  labVec <- c(rbind(c("(Intercept)",
                      "Low Legitimacy", 'High Legitimacy', 'Low Credibility', 'High Credibility',
                      'Low Costliness', 'High Costliness',
                      "Pre-Treatment Hate",
                      "Activity",
                      "Follower Count",
                      "Age"),
                    rep("(SE)", length(coefList)),
                    rep("[p]", length(coefList))), "N")
  # FORMATTING TABLES  
  reg_format <- c(formatStats(week_1), nrow(week_1$model))
  reg_table <- cbind(labVec,
                     reg_format)
  
  reg_table[reg_table=="()"] <- ""
  reg_table[reg_table=="[]"] <- ""
  
  colnames(reg_table) <- c("Variable",
                           "(1)")
  
  regOut <- list(week_1=week_1,
                 reg_table=reg_table)
  
  return(regOut)
}

table_G4_column_1 <- survReg()$reg_table

#  Table G4 Column 2: Heterogeneous treatment effects
coefList <- c('(Intercept)', 
              'treatment_armlow_legitimacy', "treatment_armhigh_legitimacy", 
              "treatment_armlow_credibility", "treatment_armhigh_credibility",
              "treatment_armlow_cost", "treatment_armhigh_cost",
              'pre_count_hate_ratio_w1', 'activity',
              'follower_count', 'age')

survReg <- function(varIn = NULL){
  week_1 <- lm(post_count_hate_ratio_w1 ~ treatment_arm + 
                 pre_count_hate_ratio_w1 + activity + follower_count + age + 
                 as.factor(seed_user_screen),data=final_data[final_data$anonymity==1,])
  
  labVec <- c(rbind(c("(Intercept)",
                      "Low Legitimacy", 'High Legitimacy', 'Low Credibility', 'High Credibility',
                      'Low Costliness', 'High Costliness',
                      "Pre-Treatment Hate",
                      "Activity",
                      "Follower Count",
                      "Age"),
                    rep("(SE)", length(coefList)),
                    rep("[p]", length(coefList))), "N")
  # FORMATTING TABLES  
  reg_format <- c(formatStats(week_1), nrow(week_1$model))
  reg_table <- cbind(labVec,
                     reg_format)
  
  reg_table[reg_table=="()"] <- ""
  reg_table[reg_table=="[]"] <- ""
  
  colnames(reg_table) <- c("Variable",
                           "(1)")
  
  regOut <- list(week_1=week_1,
                 reg_table=reg_table)
  
  return(regOut)
}

table_G4_column_2 <- survReg()$reg_table

#  Table G4 Column 3
coefList <- c('(Intercept)', 
              'treatment_armlow_legitimacy', "treatment_armhigh_legitimacy", 
              "treatment_armlow_credibility", "treatment_armhigh_credibility",
              "treatment_armlow_cost", "treatment_armhigh_cost",
              'pre_count_hate_ratio_w1', 'activity',
              'follower_count', 'age')

survReg <- function(varIn = NULL){
  week_1 <- lm(post_count_hate_ratio_w1 ~ treatment_arm + 
                 pre_count_hate_ratio_w1  + activity + follower_count + age + 
                 as.factor(seed_user_screen),data=final_data[final_data$anonymity==2,])
  
  labVec <- c(rbind(c("(Intercept)",
                      "Low Legitimacy", 'High Legitimacy', 'Low Credibility', 'High Credibility',
                      'Low Costliness', 'High Costliness',
                      "Pre-Treatment Hate",
                      "Activity",
                      "Follower Count",
                      "Age"),
                    rep("(SE)", length(coefList)),
                    rep("[p]", length(coefList))), "N")
  # FORMATTING TABLES  
  reg_format <- c(formatStats(week_1), nrow(week_1$model))
  reg_table <- cbind(labVec,
                     reg_format)
  
  reg_table[reg_table=="()"] <- ""
  reg_table[reg_table=="[]"] <- ""
  
  colnames(reg_table) <- c("Variable",
                           "(1)")
  
  regOut <- list(week_1=week_1,
                 reg_table=reg_table)
  
  return(regOut)
}

table_G4_column_3 <- survReg()$reg_table




